# -*- coding: utf-8 -*-

# Scrapy settings for ScrapyBaidu project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://doc.scrapy.org/en/latest/topics/settings.html
#     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://doc.scrapy.org/en/latest/topics/spider-middleware.html

BOT_NAME = 'ScrapyBaidu'

SPIDER_MODULES = ['ScrapyBaidu.spiders']
NEWSPIDER_MODULE = 'ScrapyBaidu.spiders'

# Crawl responsibly by identifying yourself (and your website) on the user-agent
# USER_AGENT = 'ScrapyBaidu (+http://www.yourdomain.com)'

# Obey robots.txt rules
ROBOTSTXT_OBEY = False

# Configure maximum concurrent requests performed by Scrapy (default: 16)
CONCURRENT_REQUESTS = 1

# Configure a delay for requests for the same website (default: 0)
# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
DOWNLOAD_DELAY = 0
# The download delay setting will honor only one of:
CONCURRENT_REQUESTS_PER_DOMAIN = 1
CONCURRENT_REQUESTS_PER_IP = 1

# Disable cookies (enabled by default)
# COOKIES_ENABLED = False

# Disable Telnet Console (enabled by default)
# TELNETCONSOLE_ENABLED = False

# Override the default request headers:
# DEFAULT_REQUEST_HEADERS = {
#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#   'Accept-Language': 'en',
# }

# Enable or disable spider middlewares
# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
# SPIDER_MIDDLEWARES = {
#    'ScrapyBaidu.middlewares.ScrapybaiduSpiderMiddleware': 543,
# }

# Enable or disable downloader middlewares
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
DOWNLOADER_MIDDLEWARES = {
    # 'ScrapyBaidu.middlewares.BaiduIndexUserAgentMiddleware': 543,
    # 'ScrapyBaidu.middlewares.BaiduIndexProxyMiddleware': 542,
}

# Enable or disable extensions
# See https://doc.scrapy.org/en/latest/topics/extensions.html
# EXTENSIONS = {
#    'scrapy.extensions.telnet.TelnetConsole': None,
# }

# Configure item pipelines
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
    'ScrapyBaidu.pipelines.ScrapybaiduPipeline': 300,
}

# Enable and configure the AutoThrottle extension (disabled by default)
# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
# AUTOTHROTTLE_ENABLED = True
# The initial download delay
# AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
# AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
# AUTOTHROTTLE_DEBUG = False

# Enable and configure HTTP caching (disabled by default)
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
# HTTPCACHE_ENABLED = True
# HTTPCACHE_EXPIRATION_SECS = 0
# HTTPCACHE_DIR = 'httpcache'
# HTTPCACHE_IGNORE_HTTP_CODES = []
# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

# 抓取起始时间与结束时间
START_DATE = "2019-01-01"
END_DATE = "2019-09-30"

# 更改数据库配置信息

MYSQL_HOST = "127.0.0.1"
MYSQL_DBNAME = 'baiduindexdb'  # 数据库名字
MYSQL_USER = 'root'  # 数据库账号
MYSQL_PASSWD = '123456'  # 数据库密码
MYSQL_PORT = 3306

# IP池
IP_Pool = [
    "https://58.218.200.229:8794",
    "https://58.218.200.229:8793",
    "https://58.218.200.247:8781",
    "https://58.218.200.237:5180",
    "https://58.218.200.237:5170",
    "https://58.218.200.229:8760",
    "https://58.218.200.247:8775",
    "https://58.218.200.247:8770",
    "https://58.218.200.229:8772",
    "https://58.218.200.247:8765",
    "https://58.218.200.229:8770",
    "https://58.218.200.237:5177",
    "https://58.218.200.237:5184",
    "https://58.218.200.249:8764",
    "https://58.218.200.247:8760",
    "https://58.218.200.248:8787",
    "https://58.218.200.248:8794",
    "https://58.218.200.248:8753",
    "https://58.218.200.248:8788",
    "https://58.218.200.248:8791",
    "https://58.218.200.248:8770",
    "https://58.218.200.249:8777",
    "https://58.218.200.247:8753",
    "https://58.218.200.247:8772",
    "https://58.218.200.237:5191",
    "https://58.218.200.249:8768",
    "https://58.218.200.249:8790",
    "https://58.218.200.237:5190",
    "https://58.218.200.247:8789",
    "https://58.218.200.249:8775",
    "https://58.218.200.237:5152",
    "https://58.218.200.247:8756",
    "https://58.218.200.237:5174",
    "https://58.218.200.237:5185",
    "https://58.218.200.248:8769",
    "https://58.218.200.248:8783",
    "https://58.218.200.248:8771",
    "https://58.218.200.248:8785",
    "https://58.218.200.248:8750",
    "https://58.218.200.249:8776",
    "https://58.218.200.237:5156",
    "https://58.218.200.229:8759",
    "https://58.218.200.249:8762",
    "https://58.218.200.249:8765",
    "https://58.218.200.248:8754",
    "https://58.218.200.237:5168",
    "https://58.218.200.247:8793",
    "https://58.218.200.247:8795",
    "https://58.218.200.237:5194",
    "https://58.218.200.237:5196",
]

# Cookie池
COOKIES = [
    {
        "BDUSS": "jBXYUdGZnhmZkxaMlhoM2hDQVhVQjJrR3N5R1c1VDhHQUxRN2tmNGsxfkhWS2hjQVFBQUFBJCQAAAAAAAAAAAEAAACcYXxwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMfHgFzHx4BcR"},
    {
        "BDUSS": "XludUlpaUdXSTZWcGRwTWRnMi1XV0UxMkpieURNUDFENC1ULUotc3U3TUlWYWhjQVFBQUFBJCQAAAAAAAAAAAEAAACHQ7dnAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjIgFwIyIBcR"},
    {
        "BDUSS": "xrOUFBSFVzQUdWRWxqNVcyem9jWFY3SkV6emZjLUtacGhoM1dwcUVnUkhWYWhjQVFBQUFBJCQAAAAAAAAAAAEAAACAciBDAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEfIgFxHyIBcbW"},
    {
        "BDUSS": "DNGZThVVUNDcHFOZEViTGdDSWZrVENTS3diSmdaNjFrZUExem1oM1pJajFWYWhjQVFBQUFBJCQAAAAAAAAAAAEAAACypZG3AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPXIgFz1yIBcS"},
    {
        "BDUSS": "ktqYVB1OG91SjB0VU15dUNtfkY0V3dZZ1ZzVU1kRnZWV0N-cXBMS1VjWXVWcWhjQUFBQUFBJCQAAAAAAAAAAAEAAADjHj91AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC7JgFwuyYBcT"},
    {
        "BDUSS": "mlkRG14MzFENHk0Tk10SW5uTWF6T0d4Yk0xdjJBLVBGNkVyS0YzeDJqamRWcWhjQVFBQUFBJCQAAAAAAAAAAAEAAAAqPbBQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAN3JgFzdyYBcM"},
    {
        "BDUSS": "jFqRmp-NmNOOVpKVGl3eWdMaTR2MG4zdDJYSm9wQVpHVXltRmxXTEFRUWdWNmhjQVFBQUFBJCQAAAAAAAAAAAEAAABayA6pAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACDKgFwgyoBcc"},
    {
        "BDUSS": "2NDfkZ3bnhVWmtaWnA3N2FuMG1YU24wfm8zMFdnTG14OERZdjZoUS1vVmpWNmhjQVFBQUFBJCQAAAAAAAAAAAEAAAChAAZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGPKgFxjyoBcM"},
    {
        "BDUSS": "VZRMk5KSDZhSGlOWnF3UmF-Yng1bVdMNUcta2pQdU1-Z0RidH5PU2pDU3NWNmhjQVFBQUFBJCQAAAAAAAAAAAEAAADRIWrPAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKzKgFysyoBcT"},
    {
        "BDUSS": "TJxNWxnUnpZMVhrZnpXZ3RtTlFaUUIyUEw5aEctQ0lJZm15Szd0WkpMUGpWNmhjQVFBQUFBJCQAAAAAAAAAAAEAAAAj-JkGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOPKgFzjyoBcO"},
    {
        "BDUSS": "GRpVW5KeDNGUndVdU9DbEFGRnZnWXRBR2FPa1FoNm9GUmRzRmlZenRUT3RXcWhjQVFBQUFBJCQAAAAAAAAAAAEAAAANxD3EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAK3NgFytzYBcR"},
    {
        "BDUSS": "FobEhqRDZQbGdHVTFHUEFDMXNZcFJqN09tYi1Jbi05QX5Tb2NDZ09uUFZXcWhjQVFBQUFBJCQAAAAAAAAAAAEAAAAxY-IEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANXNgFzVzYBccz"},
    {
        "BDUSS": "dQZGM0MlpsMnczRXk1cjNKeUdtMjdxbkdPOFhkZnllRX5ha0NEY3FXanpXcWhjQVFBQUFBJCQAAAAAAAAAAAEAAAArvxgaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPPNgFzzzYBcUj"},
    {
        "BDUSS": "EpmbG8zQXdJMFlBNGV3d1RJbTR5QmRILVZiaU5hZU1EcWFCT3JmVEFtc3hXNmhjQUFBQUFBJCQAAAAAAAAAAAEAAAAzd8OiAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADHOgFwxzoBcd"},
    {
        "BDUSS": "JLZlVTVHFBUmZEQXJrWG9SMVNCNnNqNGF1ZmQ5M1JsVG42VEUwVn5pOXdXNmhjQVFBQUFBJCQAAAAAAAAAAAEAAABJ8bQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHDOgFxwzoBcNG"},
    {
        "BDUSS": "URnUDF4SHU0blMxaElPTzVibVI2WTd3Nzd3UDhZeFBncHJzOEdpMTlwaXZXNmhjQVFBQUFBJCQAAAAAAAAAAAEAAABX3eFtAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAK~OgFyvzoBcT"},
    {
        "BDUSS": "3YyTmk3VWU4QXJOWmttLVRHVkY1bVRYamNDd1hGTnFpeHdTeXV5VDA4dm9XNmhjQVFBQUFBJCQAAAAAAAAAAAEAAAC836cBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOjOgFzozoBcM"},
    {
        "BDUSS": "C1vb3ZENXNJYXdENkpMWFdVTmE3SGF4UEVhOVNVVy1JamdmaE5iTH5GMGpYS2hjQVFBQUFBJCQAAAAAAAAAAAEAAAAEkr63AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACPPgFwjz4Bca"},
    {
        "BDUSS": "m5DaUd0ZXRnUlp-UExGS350Zm9VZFFMeU9hRWtPTTd4UWJMM3NVUlg2QmFYS2hjQVFBQUFBJCQAAAAAAAAAAAEAAAAEdge3AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFrPgFxaz4BcM"},
    {
        "BDUSS": "VYcmpOSjlHdFUwUklta1VUTEZ-VDQxWTdVdFROMGZwWUhrYX5yWDhPYWVYS2hjQUFBQUFBJCQAAAAAAAAAAAEAAAAGeIkuAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJ7PgFyez4BcNj"},
    {
        "BDUSS": "ZvbzZqWVhEMzJxVkc5Zzl0QnU3bnotMFlDM1lua0xDSko0NXZGU1BTamRYcWhjQUFBQUFBJCQAAAAAAAAAAAEAAAD~fYcZAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAN3RgFzd0YBcTj"},
    {
        "BDUSS": "xiYmNSbTZNbjhKeDhSWE5DRllHcFF1RlAzS35rcUMxfjN2ZVhpQ3I2TWhYNmhjQUFBQUFBJCQAAAAAAAAAAAEAAABmdxqYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACHSgFwh0oBcS2"},
    {
        "BDUSS": "pTelRjWWxpNFZPeTVpQ2dDdEVMUTFuRWpYems4dk9lZ2hnRE0zWWdJNW5YNmhjQUFBQUFBJCQAAAAAAAAAAAEAAAAyficCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGfSgFxn0oBcd0"},
    {
        "BDUSS": "FhWXpMeWdpeEZ4ejFRamduOW5-VGwyOE1mY2tvRDZ5QjhVZ3ZQRkVCSHRXcWhjQUFBQUFBJCQAAAAAAAAAAAEAAADGYwlDAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAO3NgFztzYBcSG"},
    {
        "BDUSS": "I2WGg3SH5TLXFUbmVGZU1rODRqWFl1S0hQOFBwflh2dEw5N1o1OFF0QnZXNmhjQUFBQUFBJCQAAAAAAAAAAAEAAADOFqgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAG~OgFxvzoBcTW"},
    {
        "BDUSS": "zZ6aVU4b2tjdmgtTTI3M05VMmhOR3dQZzg5akJwcEdUU21Pfmh2U0ttYnNXNmhjQUFBQUFBJCQAAAAAAAAAAAEAAAC36EJaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOzOgFzszoBcU"},
    {
        "BDUSS": "WpaNjNDQlZqczdhZ1RKYmF4OWkycmxYcnN2elczdmJpNjNReWpGbk9uUTZYS2hjQUFBQUFBJCQAAAAAAAAAAAEAAABKzxCsAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADrPgFw6z4BcU"},
    {
        "BDUSS": "0Y2SVdRY3dWLXRJbWZxYmhVRXBtNjhTLWdkcFhnZE1Zb20tQklxVnZ-cUpYS2hjQUFBQUFBJCQAAAAAAAAAAAEAAAA8eWhjAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAInPgFyJz4BcY"},
    {
        "BDUSS": "FB-OVA1QUFscUFmNlFSNTJqV0hiTzRJOExOMVI1a0J6WjFWeWhVUE1jblRYS2hjQUFBQUFBJCQAAAAAAAAAAAEAAAAbj35dAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANPPgFzTz4Bcd"},
    {
        "BDUSS": "Ux0M09WSnVEb3RQa2JaaFZLekhpNU94SEtjU1JpWDF2aHpuaTBKbWNZTWNYYWhjQUFBQUFBJCQAAAAAAAAAAAEAAAAaebcZAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABzQgFwc0IBcU"},
    {
        "BDUSS": "JpNjUtVUhidDA3Nn52SVFYVEFTQ3FNb3dRaG00WC1KYjc1Q1ZMY2ZGbHpYYWhjQUFBQUFBJCQAAAAAAAAAAAEAAAD5xq9vAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHPQgFxz0IBcQn"},
    {
        "BDUSS": "lWa1N5dDY3YkxhQmhXTWNqOVhuRHFVSEtubno1N0p3V2ltfjEzNkpiVzRYYWhjQUFBQUFBJCQAAAAAAAAAAAEAAAAoPTyGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALjQgFy40IBcVn"},
    {
        "BDUSS": "ZvVlQtanlPQjhkRHdNc2hGT01RZUZrVG1xaTJRRVNJaXA4bTJrVG9BVUJYcWhjQUFBQUFBJCQAAAAAAAAAAAEAAADDAwKtAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHRgFwB0YBcbT"},
    {
        "BDUSS": "V4N01XMFlxREJUNGIyVkJucXF5bFNwTElUcDR5cFNVdW1UbGxPMW5VdFJYcWhjQUFBQUFBJCQAAAAAAAAAAAEAAABtyDwZAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFHRgFxR0YBcOT"},
    {
        "BDUSS": "5tdkZjMGNDSzZNMlBNdmJ4UVJyMEJhcGl5dTIwc2FXR2tlb2pPc1JOYThYcWhjQUFBQUFBJCQAAAAAAAAAAAEAAADmmJzmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALzRgFy80YBcb2"},
    {
        "BDUSS": "UlwUkhLRDIyeHAwNllTUXhYMEtlVFQwa3dUTXQyTmVWRS1BUUJRQm1XUXdYNmhjQUFBQUFBJCQAAAAAAAAAAAEAAACL-Q09AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADDSgFww0oBcV"},
]
